1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.io;
18  
19  import static com.google.common.base.Preconditions.checkArgument;
20  import static com.google.common.base.Preconditions.checkNotNull;
21  
22  import com.google.common.annotations.Beta;
23  import com.google.common.base.Ascii;
24  import com.google.common.collect.ImmutableList;
25  import com.google.common.hash.Funnels;
26  import com.google.common.hash.HashCode;
27  import com.google.common.hash.HashFunction;
28  import com.google.common.hash.Hasher;
29  
30  import java.io.BufferedInputStream;
31  import java.io.ByteArrayInputStream;
32  import java.io.IOException;
33  import java.io.InputStream;
34  import java.io.InputStreamReader;
35  import java.io.OutputStream;
36  import java.io.Reader;
37  import java.nio.charset.Charset;
38  import java.util.Arrays;
39  import java.util.Iterator;
40  
41  /**
42   * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a
43   * {@code ByteSource} is not an open, stateful stream for input that can be read and closed.
44   * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances.
45   *
46   * <p>{@code ByteSource} provides two kinds of methods:
47   * <ul>
48   *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
49   *   instance each time they are called. The caller is responsible for ensuring that the returned
50   *   stream is closed.
51   *   <li><b>Convenience methods:</b> These are implementations of common operations that are
52   *   typically implemented by opening a stream using one of the methods in the first category, doing
53   *   something and finally closing the stream that was opened.
54   * </ul>
55   *
56   * @since 14.0
57   * @author Colin Decker
58   */
59  public abstract class ByteSource {
60  
61    private static final int BUF_SIZE = 0x1000; // 4K
62  
63    /**
64     * Constructor for use by subclasses.
65     */
66    protected ByteSource() {}
67  
68    /**
69     * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
70     * as characters using the given {@link Charset}.
71     */
72    public CharSource asCharSource(Charset charset) {
73      return new AsCharSource(charset);
74    }
75  
76    /**
77     * Opens a new {@link InputStream} for reading from this source. This method should return a new,
78     * independent stream each time it is called.
79     *
80     * <p>The caller is responsible for ensuring that the returned stream is closed.
81     *
82     * @throws IOException if an I/O error occurs in the process of opening the stream
83     */
84    public abstract InputStream openStream() throws IOException;
85  
86    /**
87     * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
88     * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
89     * delegate to {@link #openStream()} when the stream returned by that method does not benefit
90     * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should
91     * return a new, independent stream each time it is called.
92     *
93     * <p>The caller is responsible for ensuring that the returned stream is closed.
94     *
95     * @throws IOException if an I/O error occurs in the process of opening the stream
96     * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
97     */
98    public InputStream openBufferedStream() throws IOException {
99      InputStream in = openStream();
100     return (in instanceof BufferedInputStream)
101         ? (BufferedInputStream) in
102         : new BufferedInputStream(in);
103   }
104 
105   /**
106    * Returns a view of a slice of this byte source that is at most {@code length} bytes long
107    * starting at the given {@code offset}.
108    *
109    * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
110    */
111   public ByteSource slice(long offset, long length) {
112     return new SlicedByteSource(offset, length);
113   }
114 
115   /**
116    * Returns whether the source has zero bytes. The default implementation is to open a stream and
117    * check for EOF.
118    *
119    * @throws IOException if an I/O error occurs
120    * @since 15.0
121    */
122   public boolean isEmpty() throws IOException {
123     Closer closer = Closer.create();
124     try {
125       InputStream in = closer.register(openStream());
126       return in.read() == -1;
127     } catch (Throwable e) {
128       throw closer.rethrow(e);
129     } finally {
130       closer.close();
131     }
132   }
133 
134   /**
135    * Returns the size of this source in bytes. For most implementations, this is a heavyweight
136    * operation that will open a stream, read (or {@link InputStream#skip(long) skip}, if possible)
137    * to the end of the stream and return the total number of bytes that were read.
138    *
139    * <p>For some sources, such as a file, this method may use a more efficient implementation. Note
140    * that in such cases, it is <i>possible</i> that this method will return a different number of
141    * bytes than would be returned by reading all of the bytes (for example, some special files may
142    * return a size of 0 despite actually having content when read).
143    *
144    * <p>In either case, if this is a mutable source such as a file, the size it returns may not be
145    * the same number of bytes a subsequent read would return.
146    *
147    * @throws IOException if an I/O error occurs in the process of reading the size of this source
148    */
149   public long size() throws IOException {
150     Closer closer = Closer.create();
151     try {
152       InputStream in = closer.register(openStream());
153       return countBySkipping(in);
154     } catch (IOException e) {
155       // skip may not be supported... at any rate, try reading
156     } finally {
157       closer.close();
158     }
159 
160     closer = Closer.create();
161     try {
162       InputStream in = closer.register(openStream());
163       return countByReading(in);
164     } catch (Throwable e) {
165       throw closer.rethrow(e);
166     } finally {
167       closer.close();
168     }
169   }
170 
171   /**
172    * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
173    * first call to skip threw, in which case skip may just not be supported.
174    */
175   private long countBySkipping(InputStream in) throws IOException {
176     long count = 0;
177     while (true) {
178       // don't try to skip more than available()
179       // things may work really wrong with FileInputStream otherwise
180       long skipped = in.skip(Math.min(in.available(), Integer.MAX_VALUE));
181       if (skipped <= 0) {
182         if (in.read() == -1) {
183           return count;
184         } else if (count == 0 && in.available() == 0) {
185           // if available is still zero after reading a single byte, it
186           // will probably always be zero, so we should countByReading
187           throw new IOException();
188         }
189         count++;
190       } else {
191         count += skipped;
192       }
193     }
194   }
195 
196   private static final byte[] countBuffer = new byte[BUF_SIZE];
197 
198   private long countByReading(InputStream in) throws IOException {
199     long count = 0;
200     long read;
201     while ((read = in.read(countBuffer)) != -1) {
202       count += read;
203     }
204     return count;
205   }
206 
207   /**
208    * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
209    * {@code output}.
210    *
211    * @throws IOException if an I/O error occurs in the process of reading from this source or
212    *     writing to {@code output}
213    */
214   public long copyTo(OutputStream output) throws IOException {
215     checkNotNull(output);
216 
217     Closer closer = Closer.create();
218     try {
219       InputStream in = closer.register(openStream());
220       return ByteStreams.copy(in, output);
221     } catch (Throwable e) {
222       throw closer.rethrow(e);
223     } finally {
224       closer.close();
225     }
226   }
227 
228   /**
229    * Copies the contents of this byte source to the given {@code ByteSink}.
230    *
231    * @throws IOException if an I/O error occurs in the process of reading from this source or
232    *     writing to {@code sink}
233    */
234   public long copyTo(ByteSink sink) throws IOException {
235     checkNotNull(sink);
236 
237     Closer closer = Closer.create();
238     try {
239       InputStream in = closer.register(openStream());
240       OutputStream out = closer.register(sink.openStream());
241       return ByteStreams.copy(in, out);
242     } catch (Throwable e) {
243       throw closer.rethrow(e);
244     } finally {
245       closer.close();
246     }
247   }
248 
249   /**
250    * Reads the full contents of this byte source as a byte array.
251    *
252    * @throws IOException if an I/O error occurs in the process of reading from this source
253    */
254   public byte[] read() throws IOException {
255     Closer closer = Closer.create();
256     try {
257       InputStream in = closer.register(openStream());
258       return ByteStreams.toByteArray(in);
259     } catch (Throwable e) {
260       throw closer.rethrow(e);
261     } finally {
262       closer.close();
263     }
264   }
265 
266   /**
267    * Reads the contents of this byte source using the given {@code processor} to process bytes as
268    * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
269    * Returns the result produced by the processor.
270    *
271    * @throws IOException if an I/O error occurs in the process of reading from this source or if
272    *     {@code processor} throws an {@code IOException}
273    * @since 16.0
274    */
275   @Beta
276   public <T> T read(ByteProcessor<T> processor) throws IOException {
277     checkNotNull(processor);
278 
279     Closer closer = Closer.create();
280     try {
281       InputStream in = closer.register(openStream());
282       return ByteStreams.readBytes(in, processor);
283     } catch (Throwable e) {
284       throw closer.rethrow(e);
285     } finally {
286       closer.close();
287     }
288   }
289 
290   /**
291    * Hashes the contents of this byte source using the given hash function.
292    *
293    * @throws IOException if an I/O error occurs in the process of reading from this source
294    */
295   public HashCode hash(HashFunction hashFunction) throws IOException {
296     Hasher hasher = hashFunction.newHasher();
297     copyTo(Funnels.asOutputStream(hasher));
298     return hasher.hash();
299   }
300 
301   /**
302    * Checks that the contents of this byte source are equal to the contents of the given byte
303    * source.
304    *
305    * @throws IOException if an I/O error occurs in the process of reading from this source or
306    *     {@code other}
307    */
308   public boolean contentEquals(ByteSource other) throws IOException {
309     checkNotNull(other);
310 
311     byte[] buf1 = new byte[BUF_SIZE];
312     byte[] buf2 = new byte[BUF_SIZE];
313 
314     Closer closer = Closer.create();
315     try {
316       InputStream in1 = closer.register(openStream());
317       InputStream in2 = closer.register(other.openStream());
318       while (true) {
319         int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE);
320         int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE);
321         if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
322           return false;
323         } else if (read1 != BUF_SIZE) {
324           return true;
325         }
326       }
327     } catch (Throwable e) {
328       throw closer.rethrow(e);
329     } finally {
330       closer.close();
331     }
332   }
333 
334   /**
335    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
336    * the source will contain the concatenated data from the streams of the underlying sources.
337    *
338    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
339    * close the open underlying stream.
340    *
341    * @param sources the sources to concatenate
342    * @return a {@code ByteSource} containing the concatenated data
343    * @since 15.0
344    */
345   public static ByteSource concat(Iterable<? extends ByteSource> sources) {
346     return new ConcatenatedByteSource(sources);
347   }
348 
349   /**
350    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
351    * the source will contain the concatenated data from the streams of the underlying sources.
352    *
353    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
354    * close the open underlying stream.
355    *
356    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
357    * method is called. This will fail if the iterator is infinite and may cause problems if the
358    * iterator eagerly fetches data for each source when iterated (rather than producing sources
359    * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
360    * overload if possible.
361    *
362    * @param sources the sources to concatenate
363    * @return a {@code ByteSource} containing the concatenated data
364    * @throws NullPointerException if any of {@code sources} is {@code null}
365    * @since 15.0
366    */
367   public static ByteSource concat(Iterator<? extends ByteSource> sources) {
368     return concat(ImmutableList.copyOf(sources));
369   }
370 
371   /**
372    * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
373    * the source will contain the concatenated data from the streams of the underlying sources.
374    *
375    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
376    * close the open underlying stream.
377    *
378    * @param sources the sources to concatenate
379    * @return a {@code ByteSource} containing the concatenated data
380    * @throws NullPointerException if any of {@code sources} is {@code null}
381    * @since 15.0
382    */
383   public static ByteSource concat(ByteSource... sources) {
384     return concat(ImmutableList.copyOf(sources));
385   }
386 
387   /**
388    * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
389    * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
390    *
391    * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
392    */
393   public static ByteSource wrap(byte[] b) {
394     return new ByteArrayByteSource(b);
395   }
396 
397   /**
398    * Returns an immutable {@link ByteSource} that contains no bytes.
399    *
400    * @since 15.0
401    */
402   public static ByteSource empty() {
403     return EmptyByteSource.INSTANCE;
404   }
405 
406   /**
407    * A char source that reads bytes from this source and decodes them as characters using a
408    * charset.
409    */
410   private final class AsCharSource extends CharSource {
411 
412     private final Charset charset;
413 
414     private AsCharSource(Charset charset) {
415       this.charset = checkNotNull(charset);
416     }
417 
418     @Override
419     public Reader openStream() throws IOException {
420       return new InputStreamReader(ByteSource.this.openStream(), charset);
421     }
422 
423     @Override
424     public String toString() {
425       return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
426     }
427   }
428 
429   /**
430    * A view of a subsection of the containing byte source.
431    */
432   private final class SlicedByteSource extends ByteSource {
433 
434     private final long offset;
435     private final long length;
436 
437     private SlicedByteSource(long offset, long length) {
438       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
439       checkArgument(length >= 0, "length (%s) may not be negative", length);
440       this.offset = offset;
441       this.length = length;
442     }
443 
444     @Override
445     public InputStream openStream() throws IOException {
446       return sliceStream(ByteSource.this.openStream());
447     }
448 
449     @Override
450     public InputStream openBufferedStream() throws IOException {
451       return sliceStream(ByteSource.this.openBufferedStream());
452     }
453 
454     private InputStream sliceStream(InputStream in) throws IOException {
455       if (offset > 0) {
456         try {
457           ByteStreams.skipFully(in, offset);
458         } catch (Throwable e) {
459           Closer closer = Closer.create();
460           closer.register(in);
461           try {
462             throw closer.rethrow(e);
463           } finally {
464             closer.close();
465           }
466         }
467       }
468       return ByteStreams.limit(in, length);
469     }
470 
471     @Override
472     public ByteSource slice(long offset, long length) {
473       checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
474       checkArgument(length >= 0, "length (%s) may not be negative", length);
475       long maxLength = this.length - offset;
476       return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
477     }
478 
479     @Override
480     public boolean isEmpty() throws IOException {
481       return length == 0 || super.isEmpty();
482     }
483 
484     @Override
485     public String toString() {
486       return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
487     }
488   }
489 
490   private static class ByteArrayByteSource extends ByteSource {
491 
492     protected final byte[] bytes;
493 
494     protected ByteArrayByteSource(byte[] bytes) {
495       this.bytes = checkNotNull(bytes);
496     }
497 
498     @Override
499     public InputStream openStream() {
500       return new ByteArrayInputStream(bytes);
501     }
502 
503     @Override
504     public InputStream openBufferedStream() throws IOException {
505       return openStream();
506     }
507 
508     @Override
509     public boolean isEmpty() {
510       return bytes.length == 0;
511     }
512 
513     @Override
514     public long size() {
515       return bytes.length;
516     }
517 
518     @Override
519     public byte[] read() {
520       return bytes.clone();
521     }
522 
523     @Override
524     public long copyTo(OutputStream output) throws IOException {
525       output.write(bytes);
526       return bytes.length;
527     }
528 
529     @Override
530     public <T> T read(ByteProcessor<T> processor) throws IOException {
531       processor.processBytes(bytes, 0, bytes.length);
532       return processor.getResult();
533     }
534 
535     @Override
536     public HashCode hash(HashFunction hashFunction) throws IOException {
537       return hashFunction.hashBytes(bytes);
538     }
539 
540     // TODO(user): Possibly override slice()
541 
542     @Override
543     public String toString() {
544       return "ByteSource.wrap("
545           + Ascii.truncate(BaseEncoding.base16().encode(bytes), 30, "...") + ")";
546     }
547   }
548 
549   private static final class EmptyByteSource extends ByteArrayByteSource {
550 
551     private static final EmptyByteSource INSTANCE = new EmptyByteSource();
552 
553     private EmptyByteSource() {
554       super(new byte[0]);
555     }
556 
557     @Override
558     public CharSource asCharSource(Charset charset) {
559       checkNotNull(charset);
560       return CharSource.empty();
561     }
562 
563     @Override
564     public byte[] read() {
565       return bytes; // length is 0, no need to clone
566     }
567 
568     @Override
569     public String toString() {
570       return "ByteSource.empty()";
571     }
572   }
573 
574   private static final class ConcatenatedByteSource extends ByteSource {
575 
576     private final Iterable<? extends ByteSource> sources;
577 
578     ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
579       this.sources = checkNotNull(sources);
580     }
581 
582     @Override
583     public InputStream openStream() throws IOException {
584       return new MultiInputStream(sources.iterator());
585     }
586 
587     @Override
588     public boolean isEmpty() throws IOException {
589       for (ByteSource source : sources) {
590         if (!source.isEmpty()) {
591           return false;
592         }
593       }
594       return true;
595     }
596 
597     @Override
598     public long size() throws IOException {
599       long result = 0L;
600       for (ByteSource source : sources) {
601         result += source.size();
602       }
603       return result;
604     }
605 
606     @Override
607     public String toString() {
608       return "ByteSource.concat(" + sources + ")";
609     }
610   }
611 }